Abstract
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur eget porta erat. Morbi consectetur est vel gravida pretium. Suspendisse ut dui eu ante cursus gravida non sed sem. Nullam sapien tellus, commodo id velit id, eleifend volutpat quam. Phasellus mauris velit, dapibus finibus elementum vel, pulvinar non tellus. Nunc pellentesque pretium diam, quis maximus dolor faucibus id. Nunc convallis sodales ante, ut ullamcorper est egestas vitae. Nam sit amet enim ultrices, ultrices elit pulvinar, volutpat risus.
Data comes from three sources: ** DOAJ ** OpenEditors ** Australian ERA journal evaluation list
OpenEditors Data is cleaned for ** character sets ** white space ** missing ISSN in editors – recovered by matching to other databases on publisher and title.
.** Of XXX total journals in OpenEditors, YYY are matched
### Join editors with journal information
editors_join.tbl <- editors_clean.tbl
if (!doc_debug) {
rm("editors_raw.tbl","era_raw.tbl")
}
editors_join.tbl %<>% left_join(doaj.tbl %>% select(issn,"Journal license"), by="issn") %>% rename(license="Journal license") %>% rowwise() %>% mutate(license = ifelse(is.na(license),"none",license), IND_openlicense=(license!="none"))
editors_join.tbl %<>% mutate(IND_openlicense = ifelse(issn=="",NA,IND_openlicense))
editors_join.tbl %<>% left_join(era.tbl %>% select(issn,subjects), by=c("issn"))### extract given names for gender analysis
editors_parse.tbl <- editors_join.tbl
if (!doc_debug) {
rm("editors_clean.tbl")
}
# first_name() fails on empty string, wrap it
safe_first_name <- possibly(first_name, otherwise="")
# remove honorifics
editors_parse.tbl %<>% rowwise() %>% mutate( LS_FULLNAME = str_squish(str_replace( `editor`, '(Dr\\.)|(Prof\\.)|(Doctor)|(Professor)|(Dr )|(Prof )', '')))
editors_parse.tbl %<>% rowwise() %>% mutate(LS_GIVENNAME = safe_first_name(`LS_FULLNAME`))
#post-cleanup
# single letter, or ending with a period of comma, is a last name, or abbreviation rather than first
editors_parse.tbl %<>% rowwise() %>% mutate(LS_GIVENNAME = case_when(
LS_GIVENNAME=="" ~ NA_character_,
str_length(LS_GIVENNAME)==1 ~ NA_character_,
str_detect(LS_GIVENNAME,'.*(\\.|\\,)') ~ NA_character_,
TRUE ~ LS_GIVENNAME
))### extract country using geotext
editors_parse_c.tbl <- editors_parse.tbl
if (!doc_debug) {
rm("editors_join.tbl")
}
## setup geotext
if (doc_refresh_data & doc_debug) {
py_install(packages="geotext")
}
wrap_python <- function (module,importfun) {
core_fun <- import(module)
safe_fun <- possibly(core_fun[importfun], otherwise=NA)
safe_list_fun <- function (xlist,...) {
return(
sapply(xlist, safe_fun, ...,
simplify=TRUE, USE.NAMES=FALSE )
)
}
}
geotext<- wrap_python("geotext","GeoText")
## geotext and check against naive parsing
affiliations.tbl <- editors_parse_c.tbl %>% group_by(`affiliation`) %>% slice_head(n=1) %>% ungroup() %>% select(`affiliation`)
affiliations.tbl %<>% rowwise() %>% mutate(LS_COUNTRY_CHK = tail(unlist(str_split(`affiliation`,',')),n=1))
affiliations.tbl %<>% rowwise() %>% mutate(
LS_COUNTRY_G = names(geotext(str_to_title(`LS_COUNTRY_CHK`))[[1]]["country_mentions"])[1]
)
affiliations.tbl %<>% rowwise() %>% mutate(LS_COUNTRY_CHK2 = str_to_title(LS_COUNTRY_CHK),
LS_COUNTRY = case_when(
!is.na(LS_COUNTRY_G) ~ LS_COUNTRY_G,
str_detect(LS_COUNTRY_CHK,"USA") ~ "US",
str_detect(LS_COUNTRY_CHK,"UK") ~ "GB",
str_detect(LS_COUNTRY_CHK2,"Netherlands") ~ "NL",
str_detect(LS_COUNTRY_CHK2,"Russia") ~ "RU",
str_detect(LS_COUNTRY_CHK2,"Viet Nam") ~ "VN",
str_detect(LS_COUNTRY_CHK2,"Korea") ~ "KR",
str_detect(LS_COUNTRY_CHK2,"Emirates") ~ "AE",
str_detect(LS_COUNTRY_CHK,"UAE") ~ "AE",
str_detect(LS_COUNTRY_CHK,"CHN") ~ "CN",
str_detect(LS_COUNTRY_CHK2,"Brasil") ~ "BR",
str_detect(LS_COUNTRY_CHK2,"Scotland") ~ "GB",
str_detect(LS_COUNTRY_CHK2,"Singapore") ~ "SG",
str_detect(LS_COUNTRY_CHK2,"Trinidad") ~ "TT",
str_detect(LS_COUNTRY_CHK,"KSA") ~ "SA",
str_detect(affiliation,"Korea") ~ "KR",
TRUE ~ ""
))
editors_parse_c.tbl %<>% left_join(affiliations.tbl %>% select("affiliation","LS_COUNTRY"), by=c("affiliation"))Making scholarship more inclusive requires making the characteristics of those participating visible. Because no systematic public data on self-reported author characteristics exists, however, research on participation in scholarly publications must use bibliometric methods to impute gender from author names. (See, for example, lariviere2013bibliometrics?) To impute the geneder of editors we apply a method that is commonly used in scientometric analysis and which is based on analysis of historical censuses (blevins2015jane?) to impute gender based on author names. We then use this imputation to explore the inclusion of works authored by men and women over time.1
### impute gender based on name
gender_meth <- "ipums"
#TODO:
# - multiple methods analysis
# - fix genderizer timeouts
# gender can fail on genderize method
safer_gender <- function(x,...) {
safe_gender <- possibly(gender, otherwise=list(gender=""))
rv <- safe_gender(x,...)[["gender"]]
if (is.na(rv) || (length(rv)==0)) {
rv <- ""
}
rv
}
nms.tbl <- editors_full.tbl %>% count(`LS_GIVENNAME`) %>% arrange(desc(n))
nms.tbl %<>% rowwise() %>%
mutate( LS_GENDER =
safer_gender(`LS_GIVENNAME`,method=gender_meth))
editors_full.tbl %<>% left_join(nms.tbl %>% select(-n), by=c("LS_GIVENNAME"))
edcsv.file <- "editors_full.csv.gz"
write_csv(editors_full.tbl,edcsv.file)if (!doc_debug) {
rm("editors_parse_c.tbl")
}
if ( doc_debug ) {
editors_full.tbl <- read_csv(edcsv.file,
col_types =list(subjects=col_character()) )
}
editors_analysis.tbl <- editors_full.tbl
editors_analysis.tbl %<>% mutate (
FAC_ROLE = factor(CAT_ROLE, levels=c("review","editor","chief"), ordered=TRUE ),
IND_MALE = na_if(LS_GENDER,"either"),
IND_MALE = na_if(IND_MALE,""),
IND_MALE = IND_MALE=="male",
) %>%
rename( NM_JOURNAL=journal, CAT_PUBLISHER=publisher, LS_SUBJECTS = subjects, IND_OPEN = IND_openlicense) %>%
select(NM_JOURNAL, CAT_PUBLISHER, IND_MALE, IND_OPEN, LS_COUNTRY, LS_SUBJECTS, FAC_ROLE) %>% ungroup()
### construct editorial board characteristics
journal_board_analysis.tbl <- editors_analysis.tbl %>%
group_by(NM_JOURNAL,FAC_ROLE) %>%
summarise(
CAT_PUBLISHER = head(CAT_PUBLISHER,n=1),
LIST_SUBJECTS = unique(str_split(head(LS_SUBJECTS,n=1),',')),
N_SUBJECTS = ifelse(head(LS_SUBJECTS,n=1)=="MD", 4, length(unlist(LIST_SUBJECTS))), # ERA counts 3 subjects, plus "MD" for multidisciplinary
IND_OPEN= head(IND_OPEN,n=1),
LIST_ROLEGROUP_COUNTRIES = list(na.omit(LS_COUNTRY)),
N_ROLEGROUP_COUNTRIES= length(unique(unlist(LIST_ROLEGROUP_COUNTRIES))),
N_ROLEGROUP_COUNTRIES = na_if(N_ROLEGROUP_COUNTRIES,0),
PERCENT_ROLEGROUP_MALE = mean(IND_MALE,na.rm=TRUE)
) %>% ungroup()
journal_analysis.tbl <- journal_board_analysis.tbl %>%
group_by(NM_JOURNAL) %>%
select(-N_ROLEGROUP_COUNTRIES,-PERCENT_ROLEGROUP_MALE, - LIST_ROLEGROUP_COUNTRIES) %>%
slice_head(n=1) %>% ungroup() %>% select(-NM_JOURNAL)journal_analysis.tbl %>% select(-LIST_SUBJECTS) %>% Desc()## ------------------------------------------------------------------------------
## Describe . (tbl_df, tbl, data.frame):
##
## data frame: 6080 obs. of 4 variables
## 3598 complete cases (59.2%)
##
## Nr ColName Class NAs Levels
## 1 FAC_ROLE ordered, factor 45 (0.7%) (3): 1-review,
## 2-editor, 3-chief
## 2 CAT_PUBLISHER character .
## 3 N_SUBJECTS numeric 2468 (40.6%)
## 4 IND_OPEN logical 1289 (21.2%)
##
##
## ------------------------------------------------------------------------------
## 1 - FAC_ROLE (ordered, factor)
##
## length n NAs unique levels dupes
## 6'080 6'035 45 3 3 y
## 99.3% 0.7%
##
## level freq perc cumfreq cumperc
## 1 review 5'129 85.0% 5'129 85.0%
## 2 editor 872 14.4% 6'001 99.4%
## 3 chief 34 0.6% 6'035 100.0%
## ------------------------------------------------------------------------------
## 2 - CAT_PUBLISHER (character)
##
## length n NAs unique levels dupes
## 6'080 6'080 0 17 17 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Elsevier 2'134 35.1% 2'134 35.1%
## 2 SAGE 1'191 19.6% 3'325 54.7%
## 3 Inderscience 470 7.7% 3'795 62.4%
## 4 Cambridge University Press 398 6.5% 4'193 69.0%
## 5 Emerald 370 6.1% 4'563 75.0%
## 6 Brill 279 4.6% 4'842 79.6%
## 7 MDPI 274 4.5% 5'116 84.1%
## 8 Hindawi 220 3.6% 5'336 87.8%
## 9 IGI Global 220 3.6% 5'556 91.4%
## 10 Pleiades 115 1.9% 5'671 93.3%
## 11 Karger 99 1.6% 5'770 94.9%
## 12 Frontiers Media 92 1.5% 5'862 96.4%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 3 - N_SUBJECTS (numeric)
##
## length n NAs unique 0s mean meanCI'
## 6'080 3'612 2'468 4 0 1.95 1.92
## 59.4% 40.6% 0.0% 1.98
##
## .05 .10 .25 median .75 .90 .95
## 1.00 1.00 1.00 2.00 3.00 3.00 3.00
##
## range sd vcoef mad IQR skew kurt
## 3.00 0.87 0.45 1.48 2.00 0.35 -1.04
##
##
## level freq perc cumfreq cumperc
## 1 1 1'363 37.7% 1'363 37.7%
## 2 2 1'166 32.3% 2'529 70.0%
## 3 3 984 27.2% 3'513 97.3%
## 4 4 99 2.7% 3'612 100.0%
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 4 - IND_OPEN (logical - dichotomous)
##
## length n NAs unique
## 6'080 4'791 1'289 2
## 78.8% 21.2%
##
## freq perc lci.95 uci.95'
## FALSE 4'000 83.5% 82.4% 84.5%
## TRUE 791 16.5% 15.5% 17.6%
##
## ' 95%-CI (Wilson)
editors_analysis.tbl %>% Desc(formula=~IND_MALE+LS_COUNTRY+FAC_ROLE+IND_OPEN,data=.)## ------------------------------------------------------------------------------
## .$IND_MALE (logical)
##
## length n NAs unique
## 478'562 314'469 164'093 2
## 65.7% 34.3%
##
## freq perc lci.95 uci.95'
## FALSE 100'097 31.8% 31.7% 32.0%
## TRUE 214'372 68.2% 68.0% 68.3%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## .$LS_COUNTRY (character)
##
## length n NAs unique levels dupes
## 478'562 478'562 0 191 191 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 US 133'814 28.0% 133'814 28.0%
## 2 GB 39'538 8.3% 173'352 36.2%
## 3 IT 31'812 6.6% 205'164 42.9%
## 4 CN 27'418 5.7% 232'582 48.6%
## 5 20'778 4.3% 253'360 52.9%
## 6 DE 19'935 4.2% 273'295 57.1%
## 7 AU 17'841 3.7% 291'136 60.8%
## 8 FR 16'468 3.4% 307'604 64.3%
## 9 ES 16'154 3.4% 323'758 67.7%
## 10 CA 15'782 3.3% 339'540 71.0%
## 11 JP 11'786 2.5% 351'326 73.4%
## 12 IN 9'928 2.1% 361'254 75.5%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## .$FAC_ROLE (ordered)
##
## length n NAs unique levels dupes
## 478'562 469'172 9'390 3 3 y
## 98.0% 2.0%
##
## level freq perc cumfreq cumperc
## 1 review 317'646 67.7% 317'646 67.7%
## 2 editor 145'296 31.0% 462'942 98.7%
## 3 chief 6'230 1.3% 469'172 100.0%
## ------------------------------------------------------------------------------
## .$IND_OPEN (logical)
##
## length n NAs unique
## 478'562 430'553 48'009 2
## 90.0% 10.0%
##
## freq perc lci.95 uci.95'
## FALSE 197'612 45.9% 45.7% 46.0%
## TRUE 232'941 54.1% 54.0% 54.3%
##
## ' 95%-CI (Wilson)
editors_analysis.tbl %>% Desc(formula=LS_COUNTRY+FAC_ROLE+IND_MALE~IND_OPEN,data=.)## ------------------------------------------------------------------------------
## LS_COUNTRY ~ IND_OPEN (.)
##
## Summary:
## n: 430'553, rows: 191, columns: 2
##
## Pearson's Chi-squared test:
## X-squared = NaN, df = 190, p-value = NA
## Log likelihood ratio (G-test) test of independence:
## G = 27789, X-squared df = 190, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
## X-squared = 970.61, df = 1, p-value < 2.2e-16
##
## Warning message:
## Exp. counts < 5: Chi-squared approx. may be incorrect!!
##
##
## Phi-Coefficient NaN
## Contingency Coeff. NaN
## Cramer's V NaN
##
##
## IND_OPEN FALSE TRUE Sum
## LS_COUNTRY
## freq 12'023 3'385 15'408
## perc 2.8% 0.8% 3.6%
## p.row 78.0% 22.0% .
## p.col 6.1% 1.5% .
##
## AD freq 2 0 2
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## AE freq 471 303 774
## perc 0.1% 0.1% 0.2%
## p.row 60.9% 39.1% .
## p.col 0.2% 0.1% .
##
## AF freq 6 0 6
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## AL freq 21 24 45
## perc 0.0% 0.0% 0.0%
## p.row 46.7% 53.3% .
## p.col 0.0% 0.0% .
##
## AM freq 15 12 27
## perc 0.0% 0.0% 0.0%
## p.row 55.6% 44.4% .
## p.col 0.0% 0.0% .
##
## AN freq 0 2 2
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## AO freq 3 0 3
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## AR freq 486 1'119 1'605
## perc 0.1% 0.3% 0.4%
## p.row 30.3% 69.7% .
## p.col 0.2% 0.5% .
##
## AS freq 0 1 1
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## AT freq 1'290 1'825 3'115
## perc 0.3% 0.4% 0.7%
## p.row 41.4% 58.6% .
## p.col 0.7% 0.8% .
##
## AU freq 8'179 8'196 16'375
## perc 1.9% 1.9% 3.8%
## p.row 49.9% 50.1% .
## p.col 4.1% 3.5% .
##
## AZ freq 10 3 13
## perc 0.0% 0.0% 0.0%
## p.row 76.9% 23.1% .
## p.col 0.0% 0.0% .
##
## BA freq 0 0 0
## perc 0.0% 0.0% 0.0%
## p.row NA NA .
## p.col 0.0% 0.0% .
##
## BB freq 13 6 19
## perc 0.0% 0.0% 0.0%
## p.row 68.4% 31.6% .
## p.col 0.0% 0.0% .
##
## BD freq 102 167 269
## perc 0.0% 0.0% 0.1%
## p.row 37.9% 62.1% .
## p.col 0.1% 0.1% .
##
## BE freq 1'618 2'141 3'759
## perc 0.4% 0.5% 0.9%
## p.row 43.0% 57.0% .
## p.col 0.8% 0.9% .
##
## BF freq 8 14 22
## perc 0.0% 0.0% 0.0%
## p.row 36.4% 63.6% .
## p.col 0.0% 0.0% .
##
## BG freq 131 167 298
## perc 0.0% 0.0% 0.1%
## p.row 44.0% 56.0% .
## p.col 0.1% 0.1% .
##
## BH freq 62 30 92
## perc 0.0% 0.0% 0.0%
## p.row 67.4% 32.6% .
## p.col 0.0% 0.0% .
##
## BJ freq 5 2 7
## perc 0.0% 0.0% 0.0%
## p.row 71.4% 28.6% .
## p.col 0.0% 0.0% .
##
## BM freq 2 4 6
## perc 0.0% 0.0% 0.0%
## p.row 33.3% 66.7% .
## p.col 0.0% 0.0% .
##
## BN freq 3 17 20
## perc 0.0% 0.0% 0.0%
## p.row 15.0% 85.0% .
## p.col 0.0% 0.0% .
##
## BO freq 3 6 9
## perc 0.0% 0.0% 0.0%
## p.row 33.3% 66.7% .
## p.col 0.0% 0.0% .
##
## BR freq 1'955 5'006 6'961
## perc 0.5% 1.2% 1.6%
## p.row 28.1% 71.9% .
## p.col 1.0% 2.1% .
##
## BS freq 3 2 5
## perc 0.0% 0.0% 0.0%
## p.row 60.0% 40.0% .
## p.col 0.0% 0.0% .
##
## BT freq 5 1 6
## perc 0.0% 0.0% 0.0%
## p.row 83.3% 16.7% .
## p.col 0.0% 0.0% .
##
## BW freq 22 22 44
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## BY freq 30 16 46
## perc 0.0% 0.0% 0.0%
## p.row 65.2% 34.8% .
## p.col 0.0% 0.0% .
##
## BZ freq 1 1 2
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## CA freq 7'170 7'042 14'212
## perc 1.7% 1.6% 3.3%
## p.row 50.5% 49.5% .
## p.col 3.6% 3.0% .
##
## CH freq 2'025 3'681 5'706
## perc 0.5% 0.9% 1.3%
## p.row 35.5% 64.5% .
## p.col 1.0% 1.6% .
##
## CI freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## CL freq 368 932 1'300
## perc 0.1% 0.2% 0.3%
## p.row 28.3% 71.7% .
## p.col 0.2% 0.4% .
##
## CM freq 25 41 66
## perc 0.0% 0.0% 0.0%
## p.row 37.9% 62.1% .
## p.col 0.0% 0.0% .
##
## CN freq 9'212 15'194 24'406
## perc 2.1% 3.5% 5.7%
## p.row 37.7% 62.3% .
## p.col 4.7% 6.5% .
##
## CO freq 181 322 503
## perc 0.0% 0.1% 0.1%
## p.row 36.0% 64.0% .
## p.col 0.1% 0.1% .
##
## CR freq 36 40 76
## perc 0.0% 0.0% 0.0%
## p.row 47.4% 52.6% .
## p.col 0.0% 0.0% .
##
## CU freq 20 47 67
## perc 0.0% 0.0% 0.0%
## p.row 29.9% 70.1% .
## p.col 0.0% 0.0% .
##
## CY freq 245 242 487
## perc 0.1% 0.1% 0.1%
## p.row 50.3% 49.7% .
## p.col 0.1% 0.1% .
##
## CZ freq 360 264 624
## perc 0.1% 0.1% 0.1%
## p.row 57.7% 42.3% .
## p.col 0.2% 0.1% .
##
## DE freq 6'470 11'868 18'338
## perc 1.5% 2.8% 4.3%
## p.row 35.3% 64.7% .
## p.col 3.3% 5.1% .
##
## DK freq 1'483 1'630 3'113
## perc 0.3% 0.4% 0.7%
## p.row 47.6% 52.4% .
## p.col 0.8% 0.7% .
##
## DM freq 0 1 1
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## DO freq 3 10 13
## perc 0.0% 0.0% 0.0%
## p.row 23.1% 76.9% .
## p.col 0.0% 0.0% .
##
## DZ freq 124 39 163
## perc 0.0% 0.0% 0.0%
## p.row 76.1% 23.9% .
## p.col 0.1% 0.0% .
##
## EC freq 32 53 85
## perc 0.0% 0.0% 0.0%
## p.row 37.6% 62.4% .
## p.col 0.0% 0.0% .
##
## EE freq 112 134 246
## perc 0.0% 0.0% 0.1%
## p.row 45.5% 54.5% .
## p.col 0.1% 0.1% .
##
## EG freq 368 536 904
## perc 0.1% 0.1% 0.2%
## p.row 40.7% 59.3% .
## p.col 0.2% 0.2% .
##
## ER freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## ES freq 3'648 10'213 13'861
## perc 0.8% 2.4% 3.2%
## p.row 26.3% 73.7% .
## p.col 1.8% 4.4% .
##
## ET freq 40 82 122
## perc 0.0% 0.0% 0.0%
## p.row 32.8% 67.2% .
## p.col 0.0% 0.0% .
##
## FI freq 1'397 1'191 2'588
## perc 0.3% 0.3% 0.6%
## p.row 54.0% 46.0% .
## p.col 0.7% 0.5% .
##
## FJ freq 25 19 44
## perc 0.0% 0.0% 0.0%
## p.row 56.8% 43.2% .
## p.col 0.0% 0.0% .
##
## FK freq 0 2 2
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## FO freq 4 2 6
## perc 0.0% 0.0% 0.0%
## p.row 66.7% 33.3% .
## p.col 0.0% 0.0% .
##
## FR freq 5'025 9'062 14'087
## perc 1.2% 2.1% 3.3%
## p.row 35.7% 64.3% .
## p.col 2.5% 3.9% .
##
## GA freq 2 5 7
## perc 0.0% 0.0% 0.0%
## p.row 28.6% 71.4% .
## p.col 0.0% 0.0% .
##
## GB freq 20'031 15'808 35'839
## perc 4.7% 3.7% 8.3%
## p.row 55.9% 44.1% .
## p.col 10.1% 6.8% .
##
## GD freq 3 5 8
## perc 0.0% 0.0% 0.0%
## p.row 37.5% 62.5% .
## p.col 0.0% 0.0% .
##
## GE freq 44 27 71
## perc 0.0% 0.0% 0.0%
## p.row 62.0% 38.0% .
## p.col 0.0% 0.0% .
##
## GF freq 1 5 6
## perc 0.0% 0.0% 0.0%
## p.row 16.7% 83.3% .
## p.col 0.0% 0.0% .
##
## GH freq 133 99 232
## perc 0.0% 0.0% 0.1%
## p.row 57.3% 42.7% .
## p.col 0.1% 0.0% .
##
## GL freq 0 4 4
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## GM freq 1 2 3
## perc 0.0% 0.0% 0.0%
## p.row 33.3% 66.7% .
## p.col 0.0% 0.0% .
##
## GN freq 6 2 8
## perc 0.0% 0.0% 0.0%
## p.row 75.0% 25.0% .
## p.col 0.0% 0.0% .
##
## GP freq 0 0 0
## perc 0.0% 0.0% 0.0%
## p.row NA NA .
## p.col 0.0% 0.0% .
##
## GQ freq 0 2 2
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## GR freq 1'539 2'543 4'082
## perc 0.4% 0.6% 0.9%
## p.row 37.7% 62.3% .
## p.col 0.8% 1.1% .
##
## GT freq 7 5 12
## perc 0.0% 0.0% 0.0%
## p.row 58.3% 41.7% .
## p.col 0.0% 0.0% .
##
## GU freq 2 1 3
## perc 0.0% 0.0% 0.0%
## p.row 66.7% 33.3% .
## p.col 0.0% 0.0% .
##
## GW freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## GY freq 2 1 3
## perc 0.0% 0.0% 0.0%
## p.row 66.7% 33.3% .
## p.col 0.0% 0.0% .
##
## HK freq 1'295 1'195 2'490
## perc 0.3% 0.3% 0.6%
## p.row 52.0% 48.0% .
## p.col 0.7% 0.5% .
##
## HN freq 3 2 5
## perc 0.0% 0.0% 0.0%
## p.row 60.0% 40.0% .
## p.col 0.0% 0.0% .
##
## HR freq 187 257 444
## perc 0.0% 0.1% 0.1%
## p.row 42.1% 57.9% .
## p.col 0.1% 0.1% .
##
## HT freq 3 2 5
## perc 0.0% 0.0% 0.0%
## p.row 60.0% 40.0% .
## p.col 0.0% 0.0% .
##
## HU freq 472 742 1'214
## perc 0.1% 0.2% 0.3%
## p.row 38.9% 61.1% .
## p.col 0.2% 0.3% .
##
## ID freq 167 136 303
## perc 0.0% 0.0% 0.1%
## p.row 55.1% 44.9% .
## p.col 0.1% 0.1% .
##
## IE freq 999 1'116 2'115
## perc 0.2% 0.3% 0.5%
## p.row 47.2% 52.8% .
## p.col 0.5% 0.5% .
##
## IL freq 1'254 1'815 3'069
## perc 0.3% 0.4% 0.7%
## p.row 40.9% 59.1% .
## p.col 0.6% 0.8% .
##
## IN freq 4'984 4'137 9'121
## perc 1.2% 1.0% 2.1%
## p.row 54.6% 45.4% .
## p.col 2.5% 1.8% .
##
## IQ freq 69 19 88
## perc 0.0% 0.0% 0.0%
## p.row 78.4% 21.6% .
## p.col 0.0% 0.0% .
##
## IR freq 260 610 870
## perc 0.1% 0.1% 0.2%
## p.row 29.9% 70.1% .
## p.col 0.1% 0.3% .
##
## IS freq 68 58 126
## perc 0.0% 0.0% 0.0%
## p.row 54.0% 46.0% .
## p.col 0.0% 0.0% .
##
## IT freq 6'902 23'476 30'378
## perc 1.6% 5.5% 7.1%
## p.row 22.7% 77.3% .
## p.col 3.5% 10.1% .
##
## JE freq 4 3 7
## perc 0.0% 0.0% 0.0%
## p.row 57.1% 42.9% .
## p.col 0.0% 0.0% .
##
## JM freq 17 12 29
## perc 0.0% 0.0% 0.0%
## p.row 58.6% 41.4% .
## p.col 0.0% 0.0% .
##
## JO freq 132 76 208
## perc 0.0% 0.0% 0.0%
## p.row 63.5% 36.5% .
## p.col 0.1% 0.0% .
##
## JP freq 4'006 6'632 10'638
## perc 0.9% 1.5% 2.5%
## p.row 37.7% 62.3% .
## p.col 2.0% 2.8% .
##
## KE freq 154 127 281
## perc 0.0% 0.0% 0.1%
## p.row 54.8% 45.2% .
## p.col 0.1% 0.1% .
##
## KH freq 1 11 12
## perc 0.0% 0.0% 0.0%
## p.row 8.3% 91.7% .
## p.col 0.0% 0.0% .
##
## KI freq 0 3 3
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## KP freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## KR freq 1'708 3'068 4'776
## perc 0.4% 0.7% 1.1%
## p.row 35.8% 64.2% .
## p.col 0.9% 1.3% .
##
## KW freq 97 94 191
## perc 0.0% 0.0% 0.0%
## p.row 50.8% 49.2% .
## p.col 0.0% 0.0% .
##
## KZ freq 52 36 88
## perc 0.0% 0.0% 0.0%
## p.row 59.1% 40.9% .
## p.col 0.0% 0.0% .
##
## LA freq 0 2 2
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## LB freq 134 180 314
## perc 0.0% 0.0% 0.1%
## p.row 42.7% 57.3% .
## p.col 0.1% 0.1% .
##
## LC freq 0 0 0
## perc 0.0% 0.0% 0.0%
## p.row NA NA .
## p.col 0.0% 0.0% .
##
## LI freq 3 5 8
## perc 0.0% 0.0% 0.0%
## p.row 37.5% 62.5% .
## p.col 0.0% 0.0% .
##
## LK freq 93 65 158
## perc 0.0% 0.0% 0.0%
## p.row 58.9% 41.1% .
## p.col 0.0% 0.0% .
##
## LR freq 1 1 2
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## LS freq 0 1 1
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## LT freq 86 139 225
## perc 0.0% 0.0% 0.1%
## p.row 38.2% 61.8% .
## p.col 0.0% 0.1% .
##
## LU freq 79 145 224
## perc 0.0% 0.0% 0.1%
## p.row 35.3% 64.7% .
## p.col 0.0% 0.1% .
##
## LV freq 32 50 82
## perc 0.0% 0.0% 0.0%
## p.row 39.0% 61.0% .
## p.col 0.0% 0.0% .
##
## LY freq 11 5 16
## perc 0.0% 0.0% 0.0%
## p.row 68.8% 31.2% .
## p.col 0.0% 0.0% .
##
## MA freq 122 100 222
## perc 0.0% 0.0% 0.1%
## p.row 55.0% 45.0% .
## p.col 0.1% 0.0% .
##
## MC freq 12 15 27
## perc 0.0% 0.0% 0.0%
## p.row 44.4% 55.6% .
## p.col 0.0% 0.0% .
##
## MD freq 3 7 10
## perc 0.0% 0.0% 0.0%
## p.row 30.0% 70.0% .
## p.col 0.0% 0.0% .
##
## ME freq 10 11 21
## perc 0.0% 0.0% 0.0%
## p.row 47.6% 52.4% .
## p.col 0.0% 0.0% .
##
## MG freq 1 3 4
## perc 0.0% 0.0% 0.0%
## p.row 25.0% 75.0% .
## p.col 0.0% 0.0% .
##
## MK freq 7 5 12
## perc 0.0% 0.0% 0.0%
## p.row 58.3% 41.7% .
## p.col 0.0% 0.0% .
##
## ML freq 4 5 9
## perc 0.0% 0.0% 0.0%
## p.row 44.4% 55.6% .
## p.col 0.0% 0.0% .
##
## MM freq 3 4 7
## perc 0.0% 0.0% 0.0%
## p.row 42.9% 57.1% .
## p.col 0.0% 0.0% .
##
## MN freq 10 2 12
## perc 0.0% 0.0% 0.0%
## p.row 83.3% 16.7% .
## p.col 0.0% 0.0% .
##
## MO freq 79 37 116
## perc 0.0% 0.0% 0.0%
## p.row 68.1% 31.9% .
## p.col 0.0% 0.0% .
##
## MQ freq 0 0 0
## perc 0.0% 0.0% 0.0%
## p.row NA NA .
## p.col 0.0% 0.0% .
##
## MR freq 0 1 1
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## MT freq 50 57 107
## perc 0.0% 0.0% 0.0%
## p.row 46.7% 53.3% .
## p.col 0.0% 0.0% .
##
## MU freq 19 15 34
## perc 0.0% 0.0% 0.0%
## p.row 55.9% 44.1% .
## p.col 0.0% 0.0% .
##
## MV freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## MW freq 8 14 22
## perc 0.0% 0.0% 0.0%
## p.row 36.4% 63.6% .
## p.col 0.0% 0.0% .
##
## MX freq 587 1'620 2'207
## perc 0.1% 0.4% 0.5%
## p.row 26.6% 73.4% .
## p.col 0.3% 0.7% .
##
## MY freq 828 749 1'577
## perc 0.2% 0.2% 0.4%
## p.row 52.5% 47.5% .
## p.col 0.4% 0.3% .
##
## MZ freq 6 4 10
## perc 0.0% 0.0% 0.0%
## p.row 60.0% 40.0% .
## p.col 0.0% 0.0% .
##
## NA freq 9 10 19
## perc 0.0% 0.0% 0.0%
## p.row 47.4% 52.6% .
## p.col 0.0% 0.0% .
##
## NC freq 0 3 3
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## NE freq 1 3 4
## perc 0.0% 0.0% 0.0%
## p.row 25.0% 75.0% .
## p.col 0.0% 0.0% .
##
## NG freq 191 210 401
## perc 0.0% 0.0% 0.1%
## p.row 47.6% 52.4% .
## p.col 0.1% 0.1% .
##
## NI freq 5 4 9
## perc 0.0% 0.0% 0.0%
## p.row 55.6% 44.4% .
## p.col 0.0% 0.0% .
##
## NL freq 3'652 3'853 7'505
## perc 0.8% 0.9% 1.7%
## p.row 48.7% 51.3% .
## p.col 1.8% 1.7% .
##
## NO freq 1'058 1'329 2'387
## perc 0.2% 0.3% 0.6%
## p.row 44.3% 55.7% .
## p.col 0.5% 0.6% .
##
## NP freq 34 36 70
## perc 0.0% 0.0% 0.0%
## p.row 48.6% 51.4% .
## p.col 0.0% 0.0% .
##
## NZ freq 1'698 1'145 2'843
## perc 0.4% 0.3% 0.7%
## p.row 59.7% 40.3% .
## p.col 0.9% 0.5% .
##
## OM freq 82 57 139
## perc 0.0% 0.0% 0.0%
## p.row 59.0% 41.0% .
## p.col 0.0% 0.0% .
##
## PA freq 9 19 28
## perc 0.0% 0.0% 0.0%
## p.row 32.1% 67.9% .
## p.col 0.0% 0.0% .
##
## PE freq 38 96 134
## perc 0.0% 0.0% 0.0%
## p.row 28.4% 71.6% .
## p.col 0.0% 0.0% .
##
## PF freq 2 4 6
## perc 0.0% 0.0% 0.0%
## p.row 33.3% 66.7% .
## p.col 0.0% 0.0% .
##
## PH freq 116 81 197
## perc 0.0% 0.0% 0.0%
## p.row 58.9% 41.1% .
## p.col 0.1% 0.0% .
##
## PK freq 300 497 797
## perc 0.1% 0.1% 0.2%
## p.row 37.6% 62.4% .
## p.col 0.2% 0.2% .
##
## PL freq 964 1'783 2'747
## perc 0.2% 0.4% 0.6%
## p.row 35.1% 64.9% .
## p.col 0.5% 0.8% .
##
## PR freq 30 49 79
## perc 0.0% 0.0% 0.0%
## p.row 38.0% 62.0% .
## p.col 0.0% 0.0% .
##
## PT freq 1'336 3'081 4'417
## perc 0.3% 0.7% 1.0%
## p.row 30.2% 69.8% .
## p.col 0.7% 1.3% .
##
## PY freq 8 12 20
## perc 0.0% 0.0% 0.0%
## p.row 40.0% 60.0% .
## p.col 0.0% 0.0% .
##
## QA freq 156 238 394
## perc 0.0% 0.1% 0.1%
## p.row 39.6% 60.4% .
## p.col 0.1% 0.1% .
##
## RE freq 3 2 5
## perc 0.0% 0.0% 0.0%
## p.row 60.0% 40.0% .
## p.col 0.0% 0.0% .
##
## RO freq 387 539 926
## perc 0.1% 0.1% 0.2%
## p.row 41.8% 58.2% .
## p.col 0.2% 0.2% .
##
## RS freq 187 349 536
## perc 0.0% 0.1% 0.1%
## p.row 34.9% 65.1% .
## p.col 0.1% 0.1% .
##
## RU freq 2'618 1'006 3'624
## perc 0.6% 0.2% 0.8%
## p.row 72.2% 27.8% .
## p.col 1.3% 0.4% .
##
## RW freq 7 7 14
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## SA freq 443 703 1'146
## perc 0.1% 0.2% 0.3%
## p.row 38.7% 61.3% .
## p.col 0.2% 0.3% .
##
## SD freq 13 11 24
## perc 0.0% 0.0% 0.0%
## p.row 54.2% 45.8% .
## p.col 0.0% 0.0% .
##
## SE freq 2'125 2'439 4'564
## perc 0.5% 0.6% 1.1%
## p.row 46.6% 53.4% .
## p.col 1.1% 1.0% .
##
## SG freq 1'400 1'464 2'864
## perc 0.3% 0.3% 0.7%
## p.row 48.9% 51.1% .
## p.col 0.7% 0.6% .
##
## SH freq 0 1 1
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## SI freq 316 382 698
## perc 0.1% 0.1% 0.2%
## p.row 45.3% 54.7% .
## p.col 0.2% 0.2% .
##
## SK freq 115 174 289
## perc 0.0% 0.0% 0.1%
## p.row 39.8% 60.2% .
## p.col 0.1% 0.1% .
##
## SL freq 2 3 5
## perc 0.0% 0.0% 0.0%
## p.row 40.0% 60.0% .
## p.col 0.0% 0.0% .
##
## SM freq 0 3 3
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## SN freq 25 9 34
## perc 0.0% 0.0% 0.0%
## p.row 73.5% 26.5% .
## p.col 0.0% 0.0% .
##
## SO freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## SS freq 0 1 1
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## SV freq 1 1 2
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## SY freq 3 13 16
## perc 0.0% 0.0% 0.0%
## p.row 18.8% 81.2% .
## p.col 0.0% 0.0% .
##
## SZ freq 1 3 4
## perc 0.0% 0.0% 0.0%
## p.row 25.0% 75.0% .
## p.col 0.0% 0.0% .
##
## TD freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## TG freq 0 2 2
## perc 0.0% 0.0% 0.0%
## p.row 0.0% 100.0% .
## p.col 0.0% 0.0% .
##
## TH freq 389 367 756
## perc 0.1% 0.1% 0.2%
## p.row 51.5% 48.5% .
## p.col 0.2% 0.2% .
##
## TJ freq 1 1 2
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## TM freq 6 1 7
## perc 0.0% 0.0% 0.0%
## p.row 85.7% 14.3% .
## p.col 0.0% 0.0% .
##
## TN freq 134 109 243
## perc 0.0% 0.0% 0.1%
## p.row 55.1% 44.9% .
## p.col 0.1% 0.0% .
##
## TR freq 1'492 1'396 2'888
## perc 0.3% 0.3% 0.7%
## p.row 51.7% 48.3% .
## p.col 0.8% 0.6% .
##
## TT freq 16 14 30
## perc 0.0% 0.0% 0.0%
## p.row 53.3% 46.7% .
## p.col 0.0% 0.0% .
##
## TW freq 1'429 2'146 3'575
## perc 0.3% 0.5% 0.8%
## p.row 40.0% 60.0% .
## p.col 0.7% 0.9% .
##
## TZ freq 22 40 62
## perc 0.0% 0.0% 0.0%
## p.row 35.5% 64.5% .
## p.col 0.0% 0.0% .
##
## UA freq 98 94 192
## perc 0.0% 0.0% 0.0%
## p.row 51.0% 49.0% .
## p.col 0.0% 0.0% .
##
## UG freq 34 62 96
## perc 0.0% 0.0% 0.0%
## p.row 35.4% 64.6% .
## p.col 0.0% 0.0% .
##
## UK freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## US freq 63'535 57'402 120'937
## perc 14.8% 13.3% 28.1%
## p.row 52.5% 47.5% .
## p.col 32.2% 24.6% .
##
## UY freq 83 126 209
## perc 0.0% 0.0% 0.0%
## p.row 39.7% 60.3% .
## p.col 0.0% 0.1% .
##
## UZ freq 9 3 12
## perc 0.0% 0.0% 0.0%
## p.row 75.0% 25.0% .
## p.col 0.0% 0.0% .
##
## VE freq 24 34 58
## perc 0.0% 0.0% 0.0%
## p.row 41.4% 58.6% .
## p.col 0.0% 0.0% .
##
## VN freq 148 128 276
## perc 0.0% 0.0% 0.1%
## p.row 53.6% 46.4% .
## p.col 0.1% 0.1% .
##
## WS freq 1 0 1
## perc 0.0% 0.0% 0.0%
## p.row 100.0% 0.0% .
## p.col 0.0% 0.0% .
##
## XK freq 8 1 9
## perc 0.0% 0.0% 0.0%
## p.row 88.9% 11.1% .
## p.col 0.0% 0.0% .
##
## YE freq 6 6 12
## perc 0.0% 0.0% 0.0%
## p.row 50.0% 50.0% .
## p.col 0.0% 0.0% .
##
## ZA freq 1'152 854 2'006
## perc 0.3% 0.2% 0.5%
## p.row 57.4% 42.6% .
## p.col 0.6% 0.4% .
##
## ZM freq 15 25 40
## perc 0.0% 0.0% 0.0%
## p.row 37.5% 62.5% .
## p.col 0.0% 0.0% .
##
## ZW freq 32 13 45
## perc 0.0% 0.0% 0.0%
## p.row 71.1% 28.9% .
## p.col 0.0% 0.0% .
##
## Sum freq 197'612 232'941 430'553
## perc 45.9% 54.1% 100.0%
## p.row . . .
## p.col . . .
##
## ------------------------------------------------------------------------------
## FAC_ROLE ~ IND_OPEN (.)
##
## Summary:
## n: 424'264, rows: 3, columns: 2
##
## Pearson's Chi-squared test:
## X-squared = 4915.6, df = 2, p-value < 2.2e-16
## Log likelihood ratio (G-test) test of independence:
## G = 5387.8, X-squared df = 2, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
## X-squared = 2676.9, df = 1, p-value < 2.2e-16
##
## Phi-Coefficient 0.108
## Contingency Coeff. 0.107
## Cramer's V 0.108
##
##
## IND_OPEN FALSE TRUE Sum
## FAC_ROLE
##
## review freq 123'769 163'656 287'425
## perc 29.2% 38.6% 67.7%
## p.row 43.1% 56.9% .
## p.col 64.5% 70.4% .
##
## editor freq 63'957 68'368 132'325
## perc 15.1% 16.1% 31.2%
## p.row 48.3% 51.7% .
## p.col 33.3% 29.4% .
##
## chief freq 4'118 396 4'514
## perc 1.0% 0.1% 1.1%
## p.row 91.2% 8.8% .
## p.col 2.1% 0.2% .
##
## Sum freq 191'844 232'420 424'264
## perc 45.2% 54.8% 100.0%
## p.row . . .
## p.col . . .
##
## ------------------------------------------------------------------------------
## IND_MALE ~ IND_OPEN (.)
##
## Summary:
## n: 284'342, rows: 2, columns: 2
##
## Pearson's Chi-squared test (cont. adj):
## X-squared = 0.50722, df = 1, p-value = 0.4763
## Fisher's exact test p-value = 0.4752
## McNemar's chi-squared = 4734.1, df = 1, p-value < 2.2e-16
##
## estimate lwr.ci upr.ci'
##
## odds ratio 1.006 0.990 1.022
## rel. risk (col1) 1.003 0.994 1.013
## rel. risk (col2) 0.998 0.991 1.004
##
##
## Phi-Coefficient 0.001
## Contingency Coeff. 0.001
## Cramer's V 0.001
##
##
## IND_OPEN FALSE TRUE Sum
## IND_MALE
##
## FALSE freq 37'634 53'907 91'541
## perc 13.2% 19.0% 32.2%
## p.row 41.1% 58.9% .
## p.col 32.3% 32.1% .
##
## TRUE freq 78'991 113'810 192'801
## perc 27.8% 40.0% 67.8%
## p.row 41.0% 59.0% .
## p.col 67.7% 67.9% .
##
## Sum freq 116'625 167'717 284'342
## perc 41.0% 59.0% 100.0%
## p.row . . .
## p.col . . .
##
##
## ----------
## ' 95% conf. level
editors_analysis.tbl %>% Desc(formula=IND_OPEN+FAC_ROLE+IND_OPEN %in% FAC_ROLE ~ IND_MALE,data=.)## ------------------------------------------------------------------------------
## IND_OPEN ~ IND_MALE (.)
##
## Summary:
## n: 284'342, rows: 2, columns: 2
##
## Pearson's Chi-squared test (cont. adj):
## X-squared = 0.50722, df = 1, p-value = 0.4763
## Fisher's exact test p-value = 0.4752
## McNemar's chi-squared = 4734.1, df = 1, p-value < 2.2e-16
##
## estimate lwr.ci upr.ci'
##
## odds ratio 1.006 0.990 1.022
## rel. risk (col1) 1.004 0.993 1.015
## rel. risk (col2) 0.998 0.993 1.003
##
##
## Phi-Coefficient 0.001
## Contingency Coeff. 0.001
## Cramer's V 0.001
##
##
## IND_MALE FALSE TRUE Sum
## IND_OPEN
##
## FALSE freq 37'634 78'991 116'625
## perc 13.2% 27.8% 41.0%
## p.row 32.3% 67.7% .
## p.col 41.1% 41.0% .
##
## TRUE freq 53'907 113'810 167'717
## perc 19.0% 40.0% 59.0%
## p.row 32.1% 67.9% .
## p.col 58.9% 59.0% .
##
## Sum freq 91'541 192'801 284'342
## perc 32.2% 67.8% 100.0%
## p.row . . .
## p.col . . .
##
##
## ----------
## ' 95% conf. level
## ------------------------------------------------------------------------------
## FAC_ROLE ~ IND_MALE (.)
##
## Summary:
## n: 308'623, rows: 3, columns: 2
##
## Pearson's Chi-squared test:
## X-squared = 320.94, df = 2, p-value < 2.2e-16
## Log likelihood ratio (G-test) test of independence:
## G = 340.35, X-squared df = 2, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
## X-squared = 9.7643, df = 1, p-value = 0.001779
##
## Phi-Coefficient 0.032
## Contingency Coeff. 0.032
## Cramer's V 0.032
##
##
## IND_MALE FALSE TRUE Sum
## FAC_ROLE
##
## review freq 65'119 142'002 207'121
## perc 21.1% 46.0% 67.1%
## p.row 31.4% 68.6% .
## p.col 66.3% 67.5% .
##
## editor freq 32'411 65'417 97'828
## perc 10.5% 21.2% 31.7%
## p.row 33.1% 66.9% .
## p.col 33.0% 31.1% .
##
## chief freq 741 2'933 3'674
## perc 0.2% 1.0% 1.2%
## p.row 20.2% 79.8% .
## p.col 0.8% 1.4% .
##
## Sum freq 98'271 210'352 308'623
## perc 31.8% 68.2% 100.0%
## p.row . . .
## p.col . . .
##
## ------------------------------------------------------------------------------
## IND_OPEN:FAC_ROLE ~ IND_MALE (.)
##
## Summary:
## n: 280'402, rows: 6, columns: 2
##
## Pearson's Chi-squared test:
## X-squared = 263.08, df = 5, p-value < 2.2e-16
## Log likelihood ratio (G-test) test of independence:
## G = 278.21, X-squared df = 5, p-value < 2.2e-16
## Mantel-Haenszel Chi-squared:
## X-squared = 9.1472, df = 1, p-value = 0.002491
##
## Phi-Coefficient 0.031
## Contingency Coeff. 0.031
## Cramer's V 0.031
##
##
## IND_MALE FALSE TRUE Sum
## IND_OPEN:FAC_ROLE
##
## FALSE:review freq 22'391 48'416 70'807
## perc 8.0% 17.3% 25.3%
## p.row 31.6% 68.4% .
## p.col 24.8% 25.5% .
##
## TRUE:review freq 37'430 79'909 117'339
## perc 13.3% 28.5% 41.8%
## p.row 31.9% 68.1% .
## p.col 41.5% 42.0% .
##
## FALSE:editor freq 13'595 26'321 39'916
## perc 4.8% 9.4% 14.2%
## p.row 34.1% 65.9% .
## p.col 15.1% 13.8% .
##
## TRUE:editor freq 16'324 33'438 49'762
## perc 5.8% 11.9% 17.7%
## p.row 32.8% 67.2% .
## p.col 18.1% 17.6% .
##
## FALSE:chief freq 457 1'853 2'310
## perc 0.2% 0.7% 0.8%
## p.row 19.8% 80.2% .
## p.col 0.5% 1.0% .
##
## TRUE:chief freq 59 209 268
## perc 0.0% 0.1% 0.1%
## p.row 22.0% 78.0% .
## p.col 0.1% 0.1% .
##
## Sum freq 90'256 190'146 280'402
## perc 32.2% 67.8% 100.0%
## p.row . . .
## p.col . . .
##
world.sf <- ne_countries(scale = "medium", returnclass = "sf")
# Todo: track country code non-matches "RE" "UK" "GP" "MQ" "GF" "XK" "AN"
ctry_totals<- editors_analysis.tbl %>% group_by(LS_COUNTRY) %>% summarise(n_editors=n()) %>% rename(iso_a2=LS_COUNTRY)
world.sf %<>% left_join(ctry_totals)
ggplot(data = world.sf) +
geom_sf(aes(fill = n_editors)) +
scale_fill_viridis_c(option = "E", trans = "log")journal_board_analysis.tbl %>% select(-NM_JOURNAL,-LIST_ROLEGROUP_COUNTRIES,-LIST_SUBJECTS) %>% Desc(~.,data=.)## ------------------------------------------------------------------------------
## .$FAC_ROLE (ordered)
##
## length n NAs unique levels dupes
## 15'314 14'191 1'123 3 3 y
## 92.7% 7.3%
##
## level freq perc cumfreq cumperc
## 1 review 5'129 36.1% 5'129 36.1%
## 2 editor 5'454 38.4% 10'583 74.6%
## 3 chief 3'608 25.4% 14'191 100.0%
## ------------------------------------------------------------------------------
## .$CAT_PUBLISHER (character)
##
## length n NAs unique levels dupes
## 15'314 15'314 0 17 17 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Elsevier 5'814 38.0% 5'814 38.0%
## 2 SAGE 2'890 18.9% 8'704 56.8%
## 3 Inderscience 1'310 8.6% 10'014 65.4%
## 4 Emerald 1'289 8.4% 11'303 73.8%
## 5 Cambridge University Press 879 5.7% 12'182 79.5%
## 6 Brill 629 4.1% 12'811 83.7%
## 7 MDPI 536 3.5% 13'347 87.2%
## 8 IGI Global 406 2.7% 13'753 89.8%
## 9 Pleiades 344 2.2% 14'097 92.1%
## 10 Hindawi 302 2.0% 14'399 94.0%
## 11 Karger 257 1.7% 14'656 95.7%
## 12 John Benjamins 203 1.3% 14'859 97.0%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## .$N_SUBJECTS (numeric)
##
## length n NAs unique 0s mean meanCI'
## 15'314 9'339 5'975 4 0 1.95 1.93
## 61.0% 39.0% 0.0% 1.97
##
## .05 .10 .25 median .75 .90 .95
## 1.00 1.00 1.00 2.00 3.00 3.00 3.00
##
## range sd vcoef mad IQR skew kurt
## 3.00 0.87 0.45 1.48 2.00 0.35 -1.04
##
##
## level freq perc cumfreq cumperc
## 1 1 3'541 37.9% 3'541 37.9%
## 2 2 3'000 32.1% 6'541 70.0%
## 3 3 2'544 27.2% 9'085 97.3%
## 4 4 254 2.7% 9'339 100.0%
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## .$IND_OPEN (logical)
##
## length n NAs unique
## 15'314 12'038 3'276 2
## 78.6% 21.4%
##
## freq perc lci.95 uci.95'
## FALSE 10'380 86.2% 85.6% 86.8%
## TRUE 1'658 13.8% 13.2% 14.4%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## .$N_ROLEGROUP_COUNTRIES (integer)
##
## length n NAs unique 0s mean meanCI'
## 15'314 15'314 0 85 0 6.66 6.52
## 100.0% 0.0% 0.0% 6.79
##
## .05 .10 .25 median .75 .90 .95
## 1.00 1.00 1.00 3.00 10.00 17.00 21.00
##
## range sd vcoef mad IQR skew kurt
## 105.00 8.48 1.27 2.97 9.00 3.30 18.65
##
## lowest : 1 (5'122), 2 (1'924), 3 (1'042), 4 (743), 5 (639)
## highest: 89, 90 (2), 91, 100, 106
##
## heap(?): remarkable frequency (33.4%) for the mode(s) (= 1)
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## .$PERCENT_ROLEGROUP_MALE (numeric)
##
## length n NAs unique 0s mean meanCI'
## 15'314 12'210 3'104 986 1'082 0.7005036 0.6951806
## 79.7% 20.3% 7.1% 0.7058267
##
## .05 .10 .25 median .75 .90 .95
## 0.0000000 0.2500000 0.5384615 0.7500000 1.0000000 1.0000000 1.0000000
##
## range sd vcoef mad IQR skew kurt
## 1.0000000 0.3000711 0.4283648 0.3706500 0.4615385 -0.9913193 0.1952624
##
## lowest : 0.0 (1'082), 0.0526316, 0.0666667, 0.0833333 (2), 0.0909091
## highest: 0.9722222, 0.9736842, 0.975, 0.9787234, 1.0 (3'673)
##
## heap(?): remarkable frequency (30.1%) for the mode(s) (= 1)
##
## ' 95%-CI (classic)
journal_board_analysis.tbl %>%
Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)## ------------------------------------------------------------------------------
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
##
## Summary:
## n pairs: 15'314, valid: 12'038 (78.6%), missings: 3'276 (21.4%), groups: 2
##
##
## FALSE TRUE
## mean 6.253 12.936
## median 3.000 7.000
## sd 6.816 16.347
## IQR 9.000 15.000
## n 10'380 1'658
## np 86.227% 13.773%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 245.66, df = 1, p-value < 2.2e-16
##
##
## Warning:
## Grouping variable contains 3276 NAs (21.4%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##
## 1 (1,3] (3,11] (11,106]
## FALSE 89.7% 90.2% 86.7% 77.5%
## TRUE 10.3% 9.8% 13.3% 22.5%
## ------------------------------------------------------------------------------
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
##
## Summary:
## n pairs: 15'314, valid: 9'470 (61.8%), missings: 5'844 (38.2%), groups: 2
##
##
## FALSE TRUE
## mean 0.687 0.738
## median 0.750 0.772
## sd 0.308 0.251
## IQR 0.500 0.321
## n 7'927 1'543
## np 83.706% 16.294%
## NAs 2'453 115
## 0s 763 84
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 17.183, df = 1, p-value = 3.395e-05
##
##
## Warning:
## Grouping variable contains 3276 NAs (21.4%).
##
##
##
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##
## [0,0.533] (0.533,0.75] (0.75,1) 1
## FALSE 90.0% 80.6% 76.3% 86.1%
## TRUE 10.0% 19.4% 23.7% 13.9%
## ------------------------------------------------------------------------------
## IND_OPEN ~ N_SUBJECTS (.)
##
## Summary:
## n pairs: 15'314, valid: 9'339 (61.0%), missings: 5'975 (39.0%), groups: 2
##
##
## FALSE TRUE
## mean 1.954 1.885
## median 2.000 2.000
## sd 0.871 0.858
## IQR 2.000 2.000
## n 8'528 811
## np 91.316% 8.684%
## NAs 1'852 847
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 4.6992, df = 1, p-value = 0.03018
##
##
## Warning:
## Grouping variable contains 3276 NAs (21.4%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##
## 1 (1,2] (2,3] (3,4]
## FALSE 90.7% 91.0% 92.5% 91.3%
## TRUE 9.3% 9.0% 7.5% 8.7%
journal_board_analysis.tbl %>%
Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)## ------------------------------------------------------------------------------
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
##
## Summary:
## n pairs: 15'314, valid: 12'038 (78.6%), missings: 3'276 (21.4%), groups: 2
##
##
## FALSE TRUE
## mean 6.253 12.936
## median 3.000 7.000
## sd 6.816 16.347
## IQR 9.000 15.000
## n 10'380 1'658
## np 86.227% 13.773%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 245.66, df = 1, p-value < 2.2e-16
##
##
## Warning:
## Grouping variable contains 3276 NAs (21.4%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##
## 1 (1,3] (3,11] (11,106]
## FALSE 89.7% 90.2% 86.7% 77.5%
## TRUE 10.3% 9.8% 13.3% 22.5%
## ------------------------------------------------------------------------------
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
##
## Summary:
## n pairs: 15'314, valid: 9'470 (61.8%), missings: 5'844 (38.2%), groups: 2
##
##
## FALSE TRUE
## mean 0.687 0.738
## median 0.750 0.772
## sd 0.308 0.251
## IQR 0.500 0.321
## n 7'927 1'543
## np 83.706% 16.294%
## NAs 2'453 115
## 0s 763 84
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 17.183, df = 1, p-value = 3.395e-05
##
##
## Warning:
## Grouping variable contains 3276 NAs (21.4%).
##
##
##
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##
## [0,0.533] (0.533,0.75] (0.75,1) 1
## FALSE 90.0% 80.6% 76.3% 86.1%
## TRUE 10.0% 19.4% 23.7% 13.9%
## ------------------------------------------------------------------------------
## IND_OPEN ~ N_SUBJECTS (.)
##
## Summary:
## n pairs: 15'314, valid: 9'339 (61.0%), missings: 5'975 (39.0%), groups: 2
##
##
## FALSE TRUE
## mean 1.954 1.885
## median 2.000 2.000
## sd 0.871 0.858
## IQR 2.000 2.000
## n 8'528 811
## np 91.316% 8.684%
## NAs 1'852 847
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 4.6992, df = 1, p-value = 0.03018
##
##
## Warning:
## Grouping variable contains 3276 NAs (21.4%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##
## 1 (1,2] (2,3] (3,4]
## FALSE 90.7% 91.0% 92.5% 91.3%
## TRUE 9.3% 9.0% 7.5% 8.7%
journal_board_analysis.tbl %>%
filter(FAC_ROLE=="chief") %>%
Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)## ------------------------------------------------------------------------------
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
##
## Summary:
## n pairs: 3'608, valid: 2'636 (73.1%), missings: 972 (26.9%), groups: 2
##
##
## FALSE TRUE
## mean 1.330 1.389
## median 1.000 1.000
## sd 0.796 0.996
## IQR 0.000 0.000
## n 2'420 216
## np 91.806% 8.194%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 0.13217, df = 1, p-value = 0.7162
##
##
## Warning:
## Grouping variable contains 972 NAs (26.9%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##
## 1 (1,10]
## FALSE 91.9% 91.5%
## TRUE 8.1% 8.5%
## ------------------------------------------------------------------------------
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
##
## Summary:
## n pairs: 3'608, valid: 1'625 (45.0%), missings: 1'983 (55.0%), groups: 2
##
##
## FALSE TRUE
## mean 0.803 0.786
## median 1.000 1.000
## sd 0.362 0.381
## IQR 0.250 0.271
## n 1'453 172
## np 89.415% 10.585%
## NAs 967 44
## 0s 212 29
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 0.16665, df = 1, p-value = 0.6831
##
##
## Warning:
## Grouping variable contains 972 NAs (26.9%).
##
##
##
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##
## [0,0.75] (0.75,1) 1
## FALSE 89.1% 87.5% 89.6%
## TRUE 10.9% 12.5% 10.4%
## ------------------------------------------------------------------------------
## IND_OPEN ~ N_SUBJECTS (.)
##
## Summary:
## n pairs: 3'608, valid: 2'184 (60.5%), missings: 1'424 (39.5%), groups: 2
##
##
## FALSE TRUE
## mean 1.963 1.856
## median 2.000 2.000
## sd 0.873 0.849
## IQR 2.000 1.000
## n 2'059 125
## np 94.277% 5.723%
## NAs 361 91
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 1.7759, df = 1, p-value = 0.1827
##
##
## Warning:
## Grouping variable contains 972 NAs (26.9%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##
## 1 (1,2] (2,3] (3,4]
## FALSE 93.7% 93.9% 95.4% 94.7%
## TRUE 6.3% 6.1% 4.6% 5.3%
journal_board_analysis.tbl %>%
filter(FAC_ROLE=="editor") %>%
Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)## ------------------------------------------------------------------------------
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
##
## Summary:
## n pairs: 5'454, valid: 4'360 (79.9%), missings: 1'094 (20.1%), groups: 2
##
##
## FALSE TRUE
## mean 5.820 9.370
## median 4.000 3.000
## sd 6.014 15.976
## IQR 5.000 7.000
## n 3'736 624
## np 85.688% 14.312%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 0.85307, df = 1, p-value = 0.3557
##
##
## Warning:
## Grouping variable contains 1094 NAs (20.1%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##
## Q1 Q2 Q3 Q4
## FALSE 84.5% 85.8% 88.8% 85.3%
## TRUE 15.5% 14.2% 11.2% 14.7%
## ------------------------------------------------------------------------------
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
##
## Summary:
## n pairs: 5'454, valid: 3'552 (65.1%), missings: 1'902 (34.9%), groups: 2
##
##
## FALSE TRUE
## mean 0.641 0.723
## median 0.667 0.750
## sd 0.291 0.282
## IQR 0.364 0.429
## n 2'979 573
## np 83.868% 16.132%
## NAs 757 51
## 0s 262 37
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 44.709, df = 1, p-value = 2.286e-11
##
##
## Warning:
## Grouping variable contains 1094 NAs (20.1%).
##
##
##
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##
## Q1 Q2 Q3 Q4
## FALSE 88.4% 84.0% 85.6% 76.4%
## TRUE 11.6% 16.0% 14.4% 23.6%
## ------------------------------------------------------------------------------
## IND_OPEN ~ N_SUBJECTS (.)
##
## Summary:
## n pairs: 5'454, valid: 3'334 (61.1%), missings: 2'120 (38.9%), groups: 2
##
##
## FALSE TRUE
## mean 1.968 1.895
## median 2.000 2.000
## sd 0.874 0.858
## IQR 2.000 2.000
## n 3'028 306
## np 90.822% 9.178%
## NAs 708 318
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 1.9234, df = 1, p-value = 0.1655
##
##
## Warning:
## Grouping variable contains 1094 NAs (20.1%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##
## 1 (1,2] (2,3] (3,4]
## FALSE 90.2% 90.5% 92.0% 91.4%
## TRUE 9.8% 9.5% 8.0% 8.6%
journal_board_analysis.tbl %>%
filter(FAC_ROLE=="review") %>%
Desc(IND_OPEN~N_ROLEGROUP_COUNTRIES+PERCENT_ROLEGROUP_MALE+N_SUBJECTS,data=.)## ------------------------------------------------------------------------------
## IND_OPEN ~ N_ROLEGROUP_COUNTRIES (.)
##
## Summary:
## n pairs: 5'129, valid: 4'155 (81.0%), missings: 974 (19.0%), groups: 2
##
##
## FALSE TRUE
## mean 11.115 20.005
## median 11.000 15.000
## sd 7.308 16.328
## IQR 9.000 14.000
## n 3'399 756
## np 81.805% 18.195%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 250.4, df = 1, p-value < 2.2e-16
##
##
## Warning:
## Grouping variable contains 974 NAs (19%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_ROLEGROUP_COUNTRIES:
##
## Q1 Q2 Q3 Q4
## FALSE 92.3% 83.4% 84.4% 63.8%
## TRUE 7.7% 16.6% 15.6% 36.2%
## ------------------------------------------------------------------------------
## IND_OPEN ~ PERCENT_ROLEGROUP_MALE (.)
##
## Summary:
## n pairs: 5'129, valid: 3'533 (68.9%), missings: 1'596 (31.1%), groups: 2
##
##
## FALSE TRUE
## mean 0.705 0.746
## median 0.733 0.761
## sd 0.225 0.145
## IQR 0.274 0.181
## n 2'790 743
## np 78.970% 21.030%
## NAs 609 13
## 0s 76 3
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 10.464, df = 1, p-value = 0.001217
##
##
## Warning:
## Grouping variable contains 974 NAs (19%).
##
##
##
## Proportions of IND_OPEN in the quantiles of PERCENT_ROLEGROUP_MALE:
##
## Q1 Q2 Q3 Q4
## FALSE 88.5% 73.5% 71.6% 82.7%
## TRUE 11.5% 26.5% 28.4% 17.3%
## ------------------------------------------------------------------------------
## IND_OPEN ~ N_SUBJECTS (.)
##
## Summary:
## n pairs: 5'129, valid: 3'116 (60.8%), missings: 2'013 (39.2%), groups: 2
##
##
## FALSE TRUE
## mean 1.944 1.891
## median 2.000 2.000
## sd 0.870 0.851
## IQR 2.000 2.000
## n 2'758 358
## np 88.511% 11.489%
## NAs 641 398
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 1.1105, df = 1, p-value = 0.292
##
##
## Warning:
## Grouping variable contains 974 NAs (19%).
##
##
##
## Proportions of IND_OPEN in the quantiles of N_SUBJECTS:
##
## 1 (1,2] (2,3] (3,4]
## FALSE 88.1% 87.9% 89.7% 89.4%
## TRUE 11.9% 12.1% 10.3% 10.6%
lm.res <- journal_board_analysis.tbl %>%
lm(PERCENT_ROLEGROUP_MALE~IND_OPEN+FAC_ROLE+N_SUBJECTS,data=.)
# TODO: GENDER BY DISCIPLINE
#TODO: Use subject codes
summary(lm.res)##
## Call:
## lm(formula = PERCENT_ROLEGROUP_MALE ~ IND_OPEN + FAC_ROLE + N_SUBJECTS,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84647 -0.13718 0.05986 0.19609 0.36282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7133707 0.0085592 83.346 < 2e-16 ***
## IND_OPENTRUE 0.0405883 0.0109962 3.691 0.000225 ***
## FAC_ROLE.L 0.0706779 0.0065860 10.732 < 2e-16 ***
## FAC_ROLE.Q 0.0945218 0.0057947 16.312 < 2e-16 ***
## N_SUBJECTS 0.0009865 0.0039245 0.251 0.801529
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2783 on 6661 degrees of freedom
## (8648 observations deleted due to missingness)
## Multiple R-squared: 0.04761, Adjusted R-squared: 0.04704
## F-statistic: 83.25 on 4 and 6661 DF, p-value: < 2.2e-16
plot(lm.res)explore.ls <- lapply(ls(pattern="_analysis.tbl"),sym)library(skimr)
for (i in explore.ls) { cat("****\n"); print(i); cat("****\n"); print(skim(eval(i))) }## ****
## editors_analysis.tbl
## ****
## ── Data Summary ────────────────────────
## Values
## Name eval(i)
## Number of rows 478562
## Number of columns 7
## _______________________
## Column type frequency:
## character 4
## factor 1
## logical 2
## ________________________
## Group variables None
##
## ── Variable type: character ────────────────────────────────────────────────────
## skim_variable n_missing complete_rate min max empty n_unique whitespace
## 1 NM_JOURNAL 6 1.00 0 146 16 6079 0
## 2 CAT_PUBLISHER 0 1 4 35 0 17 0
## 3 LS_COUNTRY 0 1 0 2 20778 191 0
## 4 LS_SUBJECTS 150584 0.685 2 14 0 1312 0
##
## ── Variable type: factor ───────────────────────────────────────────────────────
## skim_variable n_missing complete_rate ordered n_unique
## 1 FAC_ROLE 9390 0.980 TRUE 3
## top_counts
## 1 rev: 317646, edi: 145296, chi: 6230
##
## ── Variable type: logical ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean count
## 1 IND_MALE 164093 0.657 0.682 TRU: 214372, FAL: 100097
## 2 IND_OPEN 48009 0.900 0.541 TRU: 232941, FAL: 197612
## ****
## journal_analysis.tbl
## ****
## ── Data Summary ────────────────────────
## Values
## Name eval(i)
## Number of rows 6080
## Number of columns 5
## _______________________
## Column type frequency:
## character 1
## factor 1
## list 1
## logical 1
## numeric 1
## ________________________
## Group variables None
##
## ── Variable type: character ────────────────────────────────────────────────────
## skim_variable n_missing complete_rate min max empty n_unique whitespace
## 1 CAT_PUBLISHER 0 1 4 35 0 17 0
##
## ── Variable type: factor ───────────────────────────────────────────────────────
## skim_variable n_missing complete_rate ordered n_unique
## 1 FAC_ROLE 45 0.993 TRUE 3
## top_counts
## 1 rev: 5129, edi: 872, chi: 34
##
## ── Variable type: list ─────────────────────────────────────────────────────────
## skim_variable n_missing complete_rate n_unique min_length max_length
## 1 LIST_SUBJECTS 2468 0.594 1312 1 3
##
## ── Variable type: logical ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean count
## 1 IND_OPEN 1289 0.788 0.165 FAL: 4000, TRU: 791
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75
## 1 N_SUBJECTS 2468 0.594 1.95 0.870 1 1 2 3
## p100 hist
## 1 4 ▇▇▁▆▁
## ****
## journal_board_analysis.tbl
## ****
## ── Data Summary ────────────────────────
## Values
## Name eval(i)
## Number of rows 15314
## Number of columns 9
## _______________________
## Column type frequency:
## character 2
## factor 1
## list 2
## logical 1
## numeric 3
## ________________________
## Group variables None
##
## ── Variable type: character ────────────────────────────────────────────────────
## skim_variable n_missing complete_rate min max empty n_unique whitespace
## 1 NM_JOURNAL 1 1.00 0 146 2 6079 0
## 2 CAT_PUBLISHER 0 1 4 35 0 17 0
##
## ── Variable type: factor ───────────────────────────────────────────────────────
## skim_variable n_missing complete_rate ordered n_unique
## 1 FAC_ROLE 1123 0.927 TRUE 3
## top_counts
## 1 edi: 5454, rev: 5129, chi: 3608
##
## ── Variable type: list ─────────────────────────────────────────────────────────
## skim_variable n_missing complete_rate n_unique min_length
## 1 LIST_SUBJECTS 5975 0.610 1312 1
## 2 LIST_ROLEGROUP_COUNTRIES 0 1 9645 1
## max_length
## 1 3
## 2 9179
##
## ── Variable type: logical ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean count
## 1 IND_OPEN 3276 0.786 0.138 FAL: 10380, TRU: 1658
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean sd p0 p25 p50
## 1 N_SUBJECTS 5975 0.610 1.95 0.870 1 1 2
## 2 N_ROLEGROUP_COUNTRIES 0 1 6.66 8.48 1 1 3
## 3 PERCENT_ROLEGROUP_MALE 3104 0.797 0.701 0.300 0 0.538 0.75
## p75 p100 hist
## 1 3 4 ▇▇▁▆▁
## 2 10 106 ▇▁▁▁▁
## 3 1 1 ▂▁▃▅▇
library(DescTools)
#journal_analysis.tbl %>% select(-NM_JOURNAL) %>% Desc()
for (i in explore.ls)
{
cat("****\n");
print(i);
cat("****\n");
print(Desc(
eval(i) %>% select(-starts_with("NM_"),-where(is.list))
))
}## ****
## editors_analysis.tbl
## ****
## ------------------------------------------------------------------------------
## Describe eval(i) %>% select(-starts_with("NM_"), -where(is.list)) (tbl_df, tbl, data.frame):
##
## data frame: 478562 obs. of 6 variables
## 212164 complete cases (44.3%)
##
## Nr ColName Class NAs Levels
## 1 CAT_PUBLISHER character .
## 2 IND_MALE logical 164093 (34.3%)
## 3 IND_OPEN logical 48009 (10.0%)
## 4 LS_COUNTRY character .
## 5 LS_SUBJECTS character 150584 (31.5%)
## 6 FAC_ROLE ordered, factor 9390 (2.0%) (3): 1-review,
## 2-editor, 3-chief
##
##
## ------------------------------------------------------------------------------
## 1 - CAT_PUBLISHER (character)
##
## length n NAs unique levels dupes
## 478'562 478'562 0 17 17 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Frontiers Media 174'294 36.4% 174'294 36.4%
## 2 Elsevier 108'961 22.8% 283'255 59.2%
## 3 SAGE 56'136 11.7% 339'391 70.9%
## 4 MDPI 35'063 7.3% 374'454 78.2%
## 5 Emerald 18'486 3.9% 392'940 82.1%
## 6 Inderscience 16'961 3.5% 409'901 85.7%
## 7 Hindawi 13'424 2.8% 423'325 88.5%
## 8 Cambridge University Press 12'146 2.5% 435'471 91.0%
## 9 PLOS 10'643 2.2% 446'114 93.2%
## 10 IGI Global 9'921 2.1% 456'035 95.3%
## 11 Brill 5'961 1.2% 461'996 96.5%
## 12 American Psychological Association 3'740 0.8% 465'736 97.3%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 2 - IND_MALE (logical - dichotomous)
##
## length n NAs unique
## 478'562 314'469 164'093 2
## 65.7% 34.3%
##
## freq perc lci.95 uci.95'
## FALSE 100'097 31.8% 31.7% 32.0%
## TRUE 214'372 68.2% 68.0% 68.3%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 3 - IND_OPEN (logical - dichotomous)
##
## length n NAs unique
## 478'562 430'553 48'009 2
## 90.0% 10.0%
##
## freq perc lci.95 uci.95'
## FALSE 197'612 45.9% 45.7% 46.0%
## TRUE 232'941 54.1% 54.0% 54.3%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 4 - LS_COUNTRY (character)
##
## length n NAs unique levels dupes
## 478'562 478'562 0 191 191 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 US 133'814 28.0% 133'814 28.0%
## 2 GB 39'538 8.3% 173'352 36.2%
## 3 IT 31'812 6.6% 205'164 42.9%
## 4 CN 27'418 5.7% 232'582 48.6%
## 5 20'778 4.3% 253'360 52.9%
## 6 DE 19'935 4.2% 273'295 57.1%
## 7 AU 17'841 3.7% 291'136 60.8%
## 8 FR 16'468 3.4% 307'604 64.3%
## 9 ES 16'154 3.4% 323'758 67.7%
## 10 CA 15'782 3.3% 339'540 71.0%
## 11 JP 11'786 2.5% 351'326 73.4%
## 12 IN 9'928 2.1% 361'254 75.5%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 5 - LS_SUBJECTS (character)
##
## length n NAs unique levels dupes
## 478'562 327'978 150'584 1'312 1'312 y
## 68.5% 31.5%
##
## level freq perc cumfreq cumperc
## 1 MD 16'328 5.0% 16'328 5.0%
## 2 1109,1701,1702 13'227 4.0% 29'555 9.0%
## 3 1701,1702 12'719 3.9% 42'274 12.9%
## 4 1115 8'318 2.5% 50'592 15.4%
## 5 0606,1116,1701 8'218 2.5% 58'810 17.9%
## 6 0604,1103,1801 7'698 2.3% 66'508 20.3%
## 7 1112 7'575 2.3% 74'083 22.6%
## 8 0502,0503,0605 6'990 2.1% 81'073 24.7%
## 9 1103 6'910 2.1% 87'983 26.8%
## 10 1107,1108 6'861 2.1% 94'844 28.9%
## 11 0607 6'418 2.0% 101'262 30.9%
## 12 1503 5'982 1.8% 107'244 32.7%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 6 - FAC_ROLE (ordered, factor)
##
## length n NAs unique levels dupes
## 478'562 469'172 9'390 3 3 y
## 98.0% 2.0%
##
## level freq perc cumfreq cumperc
## 1 review 317'646 67.7% 317'646 67.7%
## 2 editor 145'296 31.0% 462'942 98.7%
## 3 chief 6'230 1.3% 469'172 100.0%
## ****
## journal_analysis.tbl
## ****
## ------------------------------------------------------------------------------
## Describe eval(i) %>% select(-starts_with("NM_"), -where(is.list)) (tbl_df, tbl, data.frame):
##
## data frame: 6080 obs. of 4 variables
## 3598 complete cases (59.2%)
##
## Nr ColName Class NAs Levels
## 1 FAC_ROLE ordered, factor 45 (0.7%) (3): 1-review,
## 2-editor, 3-chief
## 2 CAT_PUBLISHER character .
## 3 N_SUBJECTS numeric 2468 (40.6%)
## 4 IND_OPEN logical 1289 (21.2%)
##
##
## ------------------------------------------------------------------------------
## 1 - FAC_ROLE (ordered, factor)
##
## length n NAs unique levels dupes
## 6'080 6'035 45 3 3 y
## 99.3% 0.7%
##
## level freq perc cumfreq cumperc
## 1 review 5'129 85.0% 5'129 85.0%
## 2 editor 872 14.4% 6'001 99.4%
## 3 chief 34 0.6% 6'035 100.0%
## ------------------------------------------------------------------------------
## 2 - CAT_PUBLISHER (character)
##
## length n NAs unique levels dupes
## 6'080 6'080 0 17 17 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Elsevier 2'134 35.1% 2'134 35.1%
## 2 SAGE 1'191 19.6% 3'325 54.7%
## 3 Inderscience 470 7.7% 3'795 62.4%
## 4 Cambridge University Press 398 6.5% 4'193 69.0%
## 5 Emerald 370 6.1% 4'563 75.0%
## 6 Brill 279 4.6% 4'842 79.6%
## 7 MDPI 274 4.5% 5'116 84.1%
## 8 Hindawi 220 3.6% 5'336 87.8%
## 9 IGI Global 220 3.6% 5'556 91.4%
## 10 Pleiades 115 1.9% 5'671 93.3%
## 11 Karger 99 1.6% 5'770 94.9%
## 12 Frontiers Media 92 1.5% 5'862 96.4%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 3 - N_SUBJECTS (numeric)
##
## length n NAs unique 0s mean meanCI'
## 6'080 3'612 2'468 4 0 1.95 1.92
## 59.4% 40.6% 0.0% 1.98
##
## .05 .10 .25 median .75 .90 .95
## 1.00 1.00 1.00 2.00 3.00 3.00 3.00
##
## range sd vcoef mad IQR skew kurt
## 3.00 0.87 0.45 1.48 2.00 0.35 -1.04
##
##
## level freq perc cumfreq cumperc
## 1 1 1'363 37.7% 1'363 37.7%
## 2 2 1'166 32.3% 2'529 70.0%
## 3 3 984 27.2% 3'513 97.3%
## 4 4 99 2.7% 3'612 100.0%
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 4 - IND_OPEN (logical - dichotomous)
##
## length n NAs unique
## 6'080 4'791 1'289 2
## 78.8% 21.2%
##
## freq perc lci.95 uci.95'
## FALSE 4'000 83.5% 82.4% 84.5%
## TRUE 791 16.5% 15.5% 17.6%
##
## ' 95%-CI (Wilson)
## ****
## journal_board_analysis.tbl
## ****
## ------------------------------------------------------------------------------
## Describe eval(i) %>% select(-starts_with("NM_"), -where(is.list)) (tbl_df, tbl, data.frame):
##
## data frame: 15314 obs. of 6 variables
## 6666 complete cases (43.5%)
##
## Nr ColName Class NAs Levels
## 1 FAC_ROLE ordered, factor 1123 (7.3%) (3):
## 1-review,
## 2-editor,
## 3-chief
## 2 CAT_PUBLISHER character .
## 3 N_SUBJECTS numeric 5975 (39.0%)
## 4 IND_OPEN logical 3276 (21.4%)
## 5 N_ROLEGROUP_COUNTRIES integer .
## 6 PERCENT_ROLEGROUP_MALE numeric 3104 (20.3%)
##
##
## ------------------------------------------------------------------------------
## 1 - FAC_ROLE (ordered, factor)
##
## length n NAs unique levels dupes
## 15'314 14'191 1'123 3 3 y
## 92.7% 7.3%
##
## level freq perc cumfreq cumperc
## 1 review 5'129 36.1% 5'129 36.1%
## 2 editor 5'454 38.4% 10'583 74.6%
## 3 chief 3'608 25.4% 14'191 100.0%
## ------------------------------------------------------------------------------
## 2 - CAT_PUBLISHER (character)
##
## length n NAs unique levels dupes
## 15'314 15'314 0 17 17 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Elsevier 5'814 38.0% 5'814 38.0%
## 2 SAGE 2'890 18.9% 8'704 56.8%
## 3 Inderscience 1'310 8.6% 10'014 65.4%
## 4 Emerald 1'289 8.4% 11'303 73.8%
## 5 Cambridge University Press 879 5.7% 12'182 79.5%
## 6 Brill 629 4.1% 12'811 83.7%
## 7 MDPI 536 3.5% 13'347 87.2%
## 8 IGI Global 406 2.7% 13'753 89.8%
## 9 Pleiades 344 2.2% 14'097 92.1%
## 10 Hindawi 302 2.0% 14'399 94.0%
## 11 Karger 257 1.7% 14'656 95.7%
## 12 John Benjamins 203 1.3% 14'859 97.0%
## ... etc.
## [list output truncated]
## ------------------------------------------------------------------------------
## 3 - N_SUBJECTS (numeric)
##
## length n NAs unique 0s mean meanCI'
## 15'314 9'339 5'975 4 0 1.95 1.93
## 61.0% 39.0% 0.0% 1.97
##
## .05 .10 .25 median .75 .90 .95
## 1.00 1.00 1.00 2.00 3.00 3.00 3.00
##
## range sd vcoef mad IQR skew kurt
## 3.00 0.87 0.45 1.48 2.00 0.35 -1.04
##
##
## level freq perc cumfreq cumperc
## 1 1 3'541 37.9% 3'541 37.9%
## 2 2 3'000 32.1% 6'541 70.0%
## 3 3 2'544 27.2% 9'085 97.3%
## 4 4 254 2.7% 9'339 100.0%
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 4 - IND_OPEN (logical - dichotomous)
##
## length n NAs unique
## 15'314 12'038 3'276 2
## 78.6% 21.4%
##
## freq perc lci.95 uci.95'
## FALSE 10'380 86.2% 85.6% 86.8%
## TRUE 1'658 13.8% 13.2% 14.4%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 5 - N_ROLEGROUP_COUNTRIES (integer)
##
## length n NAs unique 0s mean meanCI'
## 15'314 15'314 0 85 0 6.66 6.52
## 100.0% 0.0% 0.0% 6.79
##
## .05 .10 .25 median .75 .90 .95
## 1.00 1.00 1.00 3.00 10.00 17.00 21.00
##
## range sd vcoef mad IQR skew kurt
## 105.00 8.48 1.27 2.97 9.00 3.30 18.65
##
## lowest : 1 (5'122), 2 (1'924), 3 (1'042), 4 (743), 5 (639)
## highest: 89, 90 (2), 91, 100, 106
##
## heap(?): remarkable frequency (33.4%) for the mode(s) (= 1)
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 6 - PERCENT_ROLEGROUP_MALE (numeric)
##
## length n NAs unique 0s mean meanCI'
## 15'314 12'210 3'104 986 1'082 0.7005036 0.6951806
## 79.7% 20.3% 7.1% 0.7058267
##
## .05 .10 .25 median .75 .90 .95
## 0.0000000 0.2500000 0.5384615 0.7500000 1.0000000 1.0000000 1.0000000
##
## range sd vcoef mad IQR skew kurt
## 1.0000000 0.3000711 0.4283648 0.3706500 0.4615385 -0.9913193 0.1952624
##
## lowest : 0.0 (1'082), 0.0526316, 0.0666667, 0.0833333 (2), 0.0909091
## highest: 0.9722222, 0.9736842, 0.975, 0.9787234, 1.0 (3'673)
##
## heap(?): remarkable frequency (30.1%) for the mode(s) (= 1)
##
## ' 95%-CI (classic)
library(corrr)
mixed_assoc = function(df, cor_method_numeric="pearson", cor_method_ordinal="kendall",
adjust_cramersv_bias=TRUE){
# Calculate a pairwise association between all variables in a data-frame. In particular nominal vs nominal with Chi-square, numeric vs numeric with Pearson correlation, and nominal vs numeric with ANOVA.
# Adopted from https://stackoverflow.com/a/52557631/590437
# -- extended by Micah Altman to detect ordered factors, and to use DescTools rather than rcompanion
df_comb = expand.grid(names(df), names(df), stringsAsFactors = F) %>% set_names("X1", "X2")
is_nominal = function(x) class(x) %in% c("factor", "character")
# https://community.rstudio.com/t/why-is-purr-is-numeric-deprecated/3559
# https://github.com/r-lib/rlang/issues/781
is_numeric <- function(x) { is.integer(x) || is_double(x)}
is_ordinal <- function(x) { is.ordered(x) || is_logical(x)}
f = function(xName,yName) {
x = pull(df, xName)
y = pull(df, yName)
result = if(is_nominal(x) && is_nominal(y)){
# use bias corrected cramersV as described in https://rdrr.io/cran/rcompanion/man/cramerV.html
cv = CramerV(as.character(x), as.character(y), correct= adjust_cramersv_bias)
data.frame(xName, yName, assoc=cv, type="cramersV")
}else if(is_numeric(x) && is_numeric(y)){
correlation = cor(x, y, method=cor_method_numeric, use="na.or.complete")
data.frame(xName, yName, assoc=correlation, type=cor_method_numeric)
}else if(is_numeric(x) && is_nominal(y)){
# from https://stats.stackexchange.com/questions/119835/correlation-between-a-nominal-iv-and-a-continuous-dv-variable/124618#124618
r_squared = summary(lm(x ~ y))$r.squared
data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
}else if(is_nominal(x) && is_numeric(y)){
r_squared = summary(lm(y ~x))$r.squared
data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
} else if(is_ordinal(x) && is_ordinal(y)){
correlation = cor(as.integer(x), as.integer(y), method=cor_method_ordinal, use="na.or.complete")
data.frame(xName, yName, assoc=correlation, type=cor_method_ordinal)
}else if(is_ordinal(x) && is_numeric(y)){
r_squared = summary(lm(y ~ x))$r.squared
data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
}else if(is_numeric(x) && is_ordinal(y)){
r_squared = summary(lm(x ~ y))$r.squared
data.frame(xName, yName, assoc=sqrt(r_squared), type="rsq")
}else if(is_nominal(x) && is_ordinal(y)){
cv = CramerV(as.character(x), as.character(y), correct= adjust_cramersv_bias)
data.frame(xName, yName, assoc=cv, type="cramersV")
}else if(is_ordinal(x) && is_nominal(y)){
cv = CramerV(as.character(x), as.character(y), correct= adjust_cramersv_bias)
data.frame(xName, yName, assoc=cv, type="cramersV")
} else {
warn(paste("unmatched column type combination: ", class(x), class(y)))
return(NULL)
}
# finally add complete obs number and ratio to table
result %>% mutate(complete_obs_pairs=sum(!is.na(x) & !is.na(y)), complete_obs_ratio=complete_obs_pairs/length(x)) %>% rename(x=xName, y=yName)
}
# apply function to each variable combination
map2_df(df_comb$X1, df_comb$X2, f)
}for (i in explore.ls) {
cat("****\n");
print(i);
cat("****\n");
assoc.tbl <- eval(i) %>%
select(-starts_with("NM_"),-where(is.list))%>%
slice_sample(n=1000) %>%
mixed_assoc(df=.)
print(assoc.tbl)
try ({assoc.tbl %>%
select(x, y, assoc) %>%
na.omit %>%
spread(y, assoc) %>%
column_to_rownames("x") %>%
as.matrix %>%
as_cordf %>%
network_plot() %>% print})
}## ****
## editors_analysis.tbl
## ****
## x y assoc type complete_obs_pairs
## 1 CAT_PUBLISHER CAT_PUBLISHER 1.000000000 cramersV 1000
## 2 IND_MALE CAT_PUBLISHER NaN cramersV 660
## 3 IND_OPEN CAT_PUBLISHER 0.878409719 cramersV 906
## 4 LS_COUNTRY CAT_PUBLISHER 0.223342363 cramersV 1000
## 5 LS_SUBJECTS CAT_PUBLISHER 0.574557454 cramersV 701
## 6 FAC_ROLE CAT_PUBLISHER 0.250634162 cramersV 981
## 7 CAT_PUBLISHER IND_MALE NaN cramersV 660
## 8 IND_MALE IND_MALE 1.000000000 kendall 660
## 9 IND_OPEN IND_MALE -0.031760462 kendall 605
## 10 LS_COUNTRY IND_MALE NaN cramersV 660
## 11 LS_SUBJECTS IND_MALE NaN cramersV 456
## 12 FAC_ROLE IND_MALE 0.006753203 kendall 647
## 13 CAT_PUBLISHER IND_OPEN 0.878409719 cramersV 906
## 14 IND_MALE IND_OPEN -0.031760462 kendall 605
## 15 IND_OPEN IND_OPEN 1.000000000 kendall 906
## 16 LS_COUNTRY IND_OPEN NaN cramersV 906
## 17 LS_SUBJECTS IND_OPEN 0.610196778 cramersV 701
## 18 FAC_ROLE IND_OPEN -0.113641313 kendall 892
## 19 CAT_PUBLISHER LS_COUNTRY 0.223342363 cramersV 1000
## 20 IND_MALE LS_COUNTRY NaN cramersV 660
## 21 IND_OPEN LS_COUNTRY NaN cramersV 906
## 22 LS_COUNTRY LS_COUNTRY 1.000000000 cramersV 1000
## 23 LS_SUBJECTS LS_COUNTRY NaN cramersV 701
## 24 FAC_ROLE LS_COUNTRY 0.000000000 cramersV 981
## 25 CAT_PUBLISHER LS_SUBJECTS 0.574557454 cramersV 701
## 26 IND_MALE LS_SUBJECTS NaN cramersV 456
## 27 IND_OPEN LS_SUBJECTS 0.610196778 cramersV 701
## 28 LS_COUNTRY LS_SUBJECTS NaN cramersV 701
## 29 LS_SUBJECTS LS_SUBJECTS 1.000000000 cramersV 701
## 30 FAC_ROLE LS_SUBJECTS NaN cramersV 688
## 31 CAT_PUBLISHER FAC_ROLE 0.250634162 cramersV 981
## 32 IND_MALE FAC_ROLE 0.006753203 kendall 647
## 33 IND_OPEN FAC_ROLE -0.113641313 kendall 892
## 34 LS_COUNTRY FAC_ROLE 0.000000000 cramersV 981
## 35 LS_SUBJECTS FAC_ROLE NaN cramersV 688
## 36 FAC_ROLE FAC_ROLE 1.000000000 kendall 981
## complete_obs_ratio
## 1 1.000
## 2 0.660
## 3 0.906
## 4 1.000
## 5 0.701
## 6 0.981
## 7 0.660
## 8 0.660
## 9 0.605
## 10 0.660
## 11 0.456
## 12 0.647
## 13 0.906
## 14 0.605
## 15 0.906
## 16 0.906
## 17 0.701
## 18 0.892
## 19 1.000
## 20 0.660
## 21 0.906
## 22 1.000
## 23 0.701
## 24 0.981
## 25 0.701
## 26 0.456
## 27 0.701
## 28 0.701
## 29 0.701
## 30 0.688
## 31 0.981
## 32 0.647
## 33 0.892
## 34 0.981
## 35 0.688
## 36 0.981
## Error in stats::cmdscale(distance, k = 2) : NA values not allowed in 'd'
## ****
## journal_analysis.tbl
## ****
## x y assoc type complete_obs_pairs
## 1 FAC_ROLE FAC_ROLE 1.00000000 kendall 989
## 2 CAT_PUBLISHER FAC_ROLE 0.13983563 cramersV 989
## 3 N_SUBJECTS FAC_ROLE 0.03841319 rsq 572
## 4 IND_OPEN FAC_ROLE -0.10199771 kendall 777
## 5 FAC_ROLE CAT_PUBLISHER 0.13983563 cramersV 989
## 6 CAT_PUBLISHER CAT_PUBLISHER 1.00000000 cramersV 1000
## 7 N_SUBJECTS CAT_PUBLISHER 0.22701929 rsq 577
## 8 IND_OPEN CAT_PUBLISHER 0.69462192 cramersV 782
## 9 FAC_ROLE N_SUBJECTS 0.03841319 rsq 572
## 10 CAT_PUBLISHER N_SUBJECTS 0.22701929 rsq 577
## 11 N_SUBJECTS N_SUBJECTS 1.00000000 pearson 577
## 12 IND_OPEN N_SUBJECTS 0.08991528 rsq 577
## 13 FAC_ROLE IND_OPEN -0.10199771 kendall 777
## 14 CAT_PUBLISHER IND_OPEN 0.69462192 cramersV 782
## 15 N_SUBJECTS IND_OPEN 0.08991528 rsq 577
## 16 IND_OPEN IND_OPEN 1.00000000 kendall 782
## complete_obs_ratio
## 1 0.989
## 2 0.989
## 3 0.572
## 4 0.777
## 5 0.989
## 6 1.000
## 7 0.577
## 8 0.782
## 9 0.572
## 10 0.577
## 11 0.577
## 12 0.577
## 13 0.777
## 14 0.782
## 15 0.577
## 16 0.782
## ****
## journal_board_analysis.tbl
## ****
## x y assoc type
## 1 FAC_ROLE FAC_ROLE 1.00000000 kendall
## 2 CAT_PUBLISHER FAC_ROLE 0.16262126 cramersV
## 3 N_SUBJECTS FAC_ROLE 0.04831314 rsq
## 4 IND_OPEN FAC_ROLE -0.10916146 kendall
## 5 N_ROLEGROUP_COUNTRIES FAC_ROLE 0.47226446 rsq
## 6 PERCENT_ROLEGROUP_MALE FAC_ROLE 0.22983142 rsq
## 7 FAC_ROLE CAT_PUBLISHER 0.16262126 cramersV
## 8 CAT_PUBLISHER CAT_PUBLISHER 1.00000000 cramersV
## 9 N_SUBJECTS CAT_PUBLISHER 0.25507993 rsq
## 10 IND_OPEN CAT_PUBLISHER 0.63317797 cramersV
## 11 N_ROLEGROUP_COUNTRIES CAT_PUBLISHER 0.73919468 rsq
## 12 PERCENT_ROLEGROUP_MALE CAT_PUBLISHER 0.25465060 rsq
## 13 FAC_ROLE N_SUBJECTS 0.04831314 rsq
## 14 CAT_PUBLISHER N_SUBJECTS 0.25507993 rsq
## 15 N_SUBJECTS N_SUBJECTS 1.00000000 pearson
## 16 IND_OPEN N_SUBJECTS 0.01520115 rsq
## 17 N_ROLEGROUP_COUNTRIES N_SUBJECTS 0.04068278 pearson
## 18 PERCENT_ROLEGROUP_MALE N_SUBJECTS -0.02242082 pearson
## 19 FAC_ROLE IND_OPEN -0.10916146 kendall
## 20 CAT_PUBLISHER IND_OPEN 0.63317797 cramersV
## 21 N_SUBJECTS IND_OPEN 0.01520115 rsq
## 22 IND_OPEN IND_OPEN 1.00000000 kendall
## 23 N_ROLEGROUP_COUNTRIES IND_OPEN 0.33525753 rsq
## 24 PERCENT_ROLEGROUP_MALE IND_OPEN 0.05062224 rsq
## 25 FAC_ROLE N_ROLEGROUP_COUNTRIES 0.47226446 rsq
## 26 CAT_PUBLISHER N_ROLEGROUP_COUNTRIES 0.73919468 rsq
## 27 N_SUBJECTS N_ROLEGROUP_COUNTRIES 0.04068278 pearson
## 28 IND_OPEN N_ROLEGROUP_COUNTRIES 0.33525753 rsq
## 29 N_ROLEGROUP_COUNTRIES N_ROLEGROUP_COUNTRIES 1.00000000 pearson
## 30 PERCENT_ROLEGROUP_MALE N_ROLEGROUP_COUNTRIES 0.02723793 pearson
## 31 FAC_ROLE PERCENT_ROLEGROUP_MALE 0.22983142 rsq
## 32 CAT_PUBLISHER PERCENT_ROLEGROUP_MALE 0.25465060 rsq
## 33 N_SUBJECTS PERCENT_ROLEGROUP_MALE -0.02242082 pearson
## 34 IND_OPEN PERCENT_ROLEGROUP_MALE 0.05062224 rsq
## 35 N_ROLEGROUP_COUNTRIES PERCENT_ROLEGROUP_MALE 0.02723793 pearson
## 36 PERCENT_ROLEGROUP_MALE PERCENT_ROLEGROUP_MALE 1.00000000 pearson
## complete_obs_pairs complete_obs_ratio
## 1 930 0.930
## 2 930 0.930
## 3 572 0.572
## 4 733 0.733
## 5 930 0.930
## 6 737 0.737
## 7 930 0.930
## 8 1000 1.000
## 9 619 0.619
## 10 792 0.792
## 11 1000 1.000
## 12 793 0.793
## 13 572 0.572
## 14 619 0.619
## 15 619 0.619
## 16 619 0.619
## 17 619 0.619
## 18 469 0.469
## 19 733 0.733
## 20 792 0.792
## 21 619 0.619
## 22 792 0.792
## 23 792 0.792
## 24 615 0.615
## 25 930 0.930
## 26 1000 1.000
## 27 619 0.619
## 28 792 0.792
## 29 1000 1.000
## 30 793 0.793
## 31 737 0.737
## 32 793 0.793
## 33 469 0.469
## 34 615 0.615
## 35 793 0.793
## 36 793 0.793
This method is intended for aggregate analysis and coarse (binary) classification and not for individual-level analysis – e.g. the assignment of an pronoun to a specific author. The classification reported in the table is based on the IPUMS corpus. Bootstrap resampling is used to compute confidence intervals – this reflects sampling error, but not measurement error asising from heuristic name extraction, and uncertainty in name to gender assignment. As a sensitivity check for measurement error we replicated our analyses using two other methods: use of the Social Security Administration database and ‘Kantrowitz’ method (which is popular in the literature, but based on a much smaller corpus). Notwithstanding – the range of estimates does not alter the overall substantive conclusions reported above.
editors_full.tbl <- editors_parse_c.tbl
if (!doc_debug) {
rm("editors_parse.tbl")
}
## role coding
role.tbl <- editors_full.tbl %>% select(`role`) %>% group_by(role) %>% count() %>% mutate (`rolec`=str_to_title(role))
role.tbl %<>% rowwise() %>%
mutate(CAT_ROLE_FORMER = str_detect(rolec,'(Former)|(Past)|(Emerit)'))
role.tbl %<>% rowwise() %>%
mutate(CAT_ROLE = case_when(
is.na(rolec) ~ "",
str_detect(rolec,"(In Chief)|(In-Chief)") ~ "chief",
str_detect(rolec,"Founding Editor") ~ "chief",
str_detect(rolec,"Associate Editor") ~ "editor",
str_detect(rolec,"Assistant Editor") ~ "editor",
str_detect(rolec,"Senior Editor") ~ "editor",
str_detect(rolec,"Book Review") ~ "editor",
str_detect(rolec,"Academic Editor") ~ "review",
str_detect(rolec,"Review Editor") ~ "review",
str_detect(rolec,"Editorial Board") ~ "review",
str_detect(rolec,"Advisory Board") ~ "review",
str_detect(rolec,"Advisory Committee") ~ "review",
str_detect(rolec,"Scientific Committee") ~ "review",
str_detect(rolec,"Scientific Advisor") ~ "review",
str_detect(rolec,"Editor") ~ "editor",
str_detect(rolec,"Advisory") ~ "review",
str_detect(rolec,"Review") ~ "review",
str_detect(rolec,"Board") ~ "review",
str_detect(rolec,"Academic") ~ "review",
str_detect(rolec,"Members") ~ "review",
TRUE ~ ""
))
editors_full.tbl %<>% left_join(role.tbl %>% select(role,CAT_ROLE,CAT_ROLE_FORMER),by=c("role"))
rm(role.tbl)